This workbook can be run after the standard workflow. It is demonstrating how to use the annotation function to annotate the dataset that was runned through the standard worflow.
In this notebook, we will show how to use in-besca annotation to assign cell type to clusters. We focus on immune celltype and demonstrate signature-scoring functions.
An alternative in the case a an annotated training dataset already exists is to use the auto-annot module. Please refer to the corresponding tutorial.
import besca as bc
import numpy as np
import pandas as pd
import scanpy.api as sc
import matplotlib.pyplot as plt
from scipy import sparse, io
import os
import time
import logging
import seaborn as sns
sc.logging.print_versions()
# for standard processing, set verbosity to minimum
sc.settings.verbosity = 0 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=80)
version = '2.8'
start0 = time.time()
### Plot parameters for publication
def set_pub():
small_size = 10
medium_size = 12
large_size = 14
resolution = 300 #in dpi
plt.rcParams['font.weight'] = 'normal'
#plt.rc('font', **{'family':'sans-serif','sans-serif':['Helvetica']})
plt.rc('axes', titlesize=large_size, titleweight = "bold") # fontsize of the axes title
plt.rc('axes', labelsize=medium_size, labelweight = "bold") # fontsize of the x and y labels
plt.rc('xtick', labelsize=small_size) # fontsize of the tick labels
plt.rc('ytick', labelsize=small_size) # fontsize of the tick labels
plt.rc('legend', fontsize=small_size, title_fontsize = medium_size) # legend fontsize
plt.rc('figure', titlesize=large_size, titleweight = "bold") # fontsize of the figure title
plt.rc('savefig', dpi=resolution) # higher res outputs
plt.rcParams['svg.fonttype'] = 'none'
set_pub()
#define standardized filepaths based on above input
root_path = os.getcwd()
bescapath='./src/bescapub/besca/'
analysis_name = 'StdWf1_PRJCA001063_CRC_besca2'
results_file = os.path.join(results_folder, analysis_name + '.annotated.updated.h5ad')
figdir=os.path.join(root_path, 'analyzed', analysis_name+'/figures/')
sc.settings.figdir = figdir
clusters='leiden'
adata = sc.read_h5ad(os.path.join(results_folder, analysis_name + '.h5ad') )
adata
sc.pl.umap(adata, color= [clusters], legend_loc='on data', save='_leiden.svg')
sc.pl.umap(adata, color= ['Cell_type'], save='_origlabel.svg')
### Perform DE cells of each Cell_type vs. all other cells
DEgenes=bc.tl.dge.get_de(adata,'Cell_type',demethod='wilcoxon',topnr=5000, logfc=1,padj=0.05)
### Select only top genes (in order of p-val) for 2 cell types and plot expression per cell type
tops=list(DEgenes['Macrophage cell']['Name'][0:15])+list(DEgenes['Stellate cell']['Name'][0:15])
sc.pl.dotplot(adata, var_names=tops,groupby='Cell_type')
# One can load besca-provided signatures using the function below
signature_dict = bc.datasets.load_immune_signatures(refined=False)
signature_dict
Additionaly it is possible to read an compute scanpy score using this function below.
If the gmt file is composed of combined signature (UP and DN), a common score will be computed: $$Total\_SCORE= Score_{UP} - Score_{DN}$$
## PROVIDED WITH BESCA, make a local copy if you'd like to modify/add your own
gmt_file_anno= bescapath + '/besca/datasets/genesets/CellNames_scseqCMs6_sigs.gmt'
#gmt_file_anno= root_path + '/analyzed/'+analysis_name+ 'CellNames_scseqCMs6_sigs.gmt'
bc.tl.sig.combined_signature_score(adata, gmt_file_anno)
scores = [x for x in adata.obs.columns if 'scanpy' in x]
sc.pl.umap(adata, color= scores, color_map = 'viridis')